In this notebook we see how well we can reproduce Kd from simulated experimental data with our Bayesian methods, these are the same as those used in the quickmodel.py
script. We will be testing to see if sampling using the emcee
library improves our equilibration time and ultimately, the results.
In [1]:
import numpy as np
import matplotlib.pyplot as plt
from scipy import optimize
import seaborn as sns
%pylab inline
We use the same setup here as we do in the 'Simulating Experimental Fluorescence Binding Data' notebook.
In [2]:
# We define a Kd,
Kd = 2e-9 # M
# a protein concentration,
Ptot = 1e-9 * np.ones([12],np.float64) # M
# and a gradient of ligand concentrations for our experiment.
Ltot = 20.0e-6 / np.array([10**(float(i)/2.0) for i in range(12)]) # M
In [3]:
def two_component_binding(Kd, Ptot, Ltot):
"""
Parameters
----------
Kd : float
Dissociation constant
Ptot : float
Total protein concentration
Ltot : float
Total ligand concentration
Returns
-------
P : float
Free protein concentration
L : float
Free ligand concentration
PL : float
Complex concentration
"""
PL = 0.5 * ((Ptot + Ltot + Kd) - np.sqrt((Ptot + Ltot + Kd)**2 - 4*Ptot*Ltot)) # complex concentration (uM)
P = Ptot - PL; # free protein concentration in sample cell after n injections (uM)
L = Ltot - PL; # free ligand concentration in sample cell after n injections (uM)
return [P, L, PL]
In [4]:
[L, P, PL] = two_component_binding(Kd, Ptot, Ltot)
In [5]:
# y will be complex concentration
# x will be total ligand concentration
plt.semilogx(Ltot,PL, 'o')
plt.xlabel('$[L]_{tot}$ / M')
plt.ylabel('$[PL]$ / M')
plt.ylim(0,1.3e-9)
plt.axhline(Ptot[0],color='0.75',linestyle='--',label='$[P]_{tot}$')
plt.legend();
In [6]:
# Making max 1400 relative fluorescence units, and scaling all of PL (complex concentration)
# to that, adding some random noise
npoints = len(Ltot)
sigma = 10.0 # size of noise
F_PL_i = (1400/1e-9)*PL + sigma * np.random.randn(npoints)
In [7]:
# y will be complex concentration
# x will be total ligand concentration
plt.semilogx(Ltot,F_PL_i, 'ro')
plt.xlabel('$[L]_{tot}$ / M')
plt.ylabel('$Fluorescendce$')
plt.legend();
In [8]:
#Let's add an F_background just so we don't ever go below zero
F_background = 40
#We also need to model fluorescence for our ligand
F_L_i = F_background + (.4/1e-8)*Ltot + sigma * np.random.randn(npoints)
#Let's also add these to our complex fluorescence readout
F_PL_i = F_background + ((1400/1e-9)*PL + sigma * np.random.randn(npoints)) + ((.4/1e-8)*L + sigma * np.random.randn(npoints))
In [9]:
# y will be complex concentration
# x will be total ligand concentration
plt.semilogx(Ltot,F_PL_i, 'ro')
plt.semilogx(Ltot,F_L_i, 'ko')
plt.xlabel('$[L]_{tot}$ / M')
plt.ylabel('$Fluorescence$')
plt.legend();
In [10]:
# We know errors from our pipetting instruments.
P_error = 0.35
L_error = 0.08
assay_volume = 100e-6 # assay volume, L
dPstated = P_error * Ptot
dLstated = L_error * Ltot
In [11]:
# Now we'll use our Bayesian modeling scheme from assaytools.
from assaytools import pymcmodels
pymc_model = pymcmodels.make_model(Ptot, dPstated, Ltot, dLstated,
top_complex_fluorescence=F_PL_i,
top_ligand_fluorescence=F_L_i,
use_primary_inner_filter_correction=True,
use_secondary_inner_filter_correction=True,
assay_volume=assay_volume, DG_prior='uniform')
In [12]:
mcmc = pymcmodels.run_mcmc(pymc_model)
In [13]:
import matplotlib.patches as mpatches #this is for plotting with color patches
In [14]:
def mcmc_three_plots(pymc_model,mcmc,Lstated):
sns.set(style='white')
sns.set_context('talk')
import pymbar
[t,g,Neff_max] = pymbar.timeseries.detectEquilibration(mcmc.DeltaG.trace())
interval= np.percentile(a=mcmc.DeltaG.trace()[t:], q=[2.5, 50.0, 97.5])
[hist,bin_edges] = np.histogram(mcmc.DeltaG.trace()[t:],bins=40,normed=True)
binwidth = np.abs(bin_edges[0]-bin_edges[1])
#set colors for 95% interval
clrs = [(0.7372549019607844, 0.5098039215686274, 0.7411764705882353) for xx in bin_edges]
idxs = bin_edges.argsort()
idxs = idxs[::-1]
gray_before = idxs[bin_edges[idxs] < interval[0]]
gray_after = idxs[bin_edges[idxs] > interval[2]]
for idx in gray_before:
clrs[idx] = (.5,.5,.5)
for idx in gray_after:
clrs[idx] = (.5,.5,.5)
plt.clf();
plt.figure(figsize=(12,3));
plt.subplot(131)
property_name = 'top_complex_fluorescence'
complex = getattr(pymc_model, property_name)
property_name = 'top_ligand_fluorescence'
ligand = getattr(pymc_model, property_name)
for top_complex_fluorescence_model in mcmc.top_complex_fluorescence_model.trace()[::10]:
plt.semilogx(Lstated, top_complex_fluorescence_model, marker='.',color='silver')
for top_ligand_fluorescence_model in mcmc.top_ligand_fluorescence_model.trace()[::10]:
plt.semilogx(Lstated, top_ligand_fluorescence_model, marker='.',color='lightcoral', alpha=0.2)
plt.semilogx(Lstated, complex.value, 'ko',label='complex')
plt.semilogx(Lstated, ligand.value, marker='o',color='firebrick',linestyle='None',label='ligand')
#plt.xlim(.5e-8,5e-5)
plt.xlabel('$[L]_T$ (M)');
plt.yticks([])
plt.ylabel('fluorescence');
plt.legend(loc=0);
plt.subplot(132)
plt.bar(bin_edges[:-1]+binwidth/2,hist,binwidth,color=clrs, edgecolor = "white");
sns.kdeplot(mcmc.DeltaG.trace()[t:],bw=.4,color=(0.39215686274509803, 0.7098039215686275, 0.803921568627451),shade=False)
plt.axvline(x=interval[0],color=(0.5,0.5,0.5),linestyle='--')
plt.axvline(x=interval[1],color=(0.5,0.5,0.5),linestyle='--')
plt.axvline(x=interval[2],color=(0.5,0.5,0.5),linestyle='--')
plt.axvline(x=np.log(Kd),color='k')
plt.xlabel('$\Delta G$ ($k_B T$)',fontsize=16);
plt.ylabel('$P(\Delta G)$',fontsize=16);
#plt.xlim(-15,-8)
hist_legend = mpatches.Patch(color=(0.7372549019607844, 0.5098039215686274, 0.7411764705882353),
label = '$\Delta G$ = %.3g [%.3g,%.3g] $k_B T$'
%(interval[1],interval[0],interval[2]) )
plt.legend(handles=[hist_legend],fontsize=10,loc=0,frameon=True);
plt.subplot(133)
plt.plot(range(0,t),mcmc.DeltaG.trace()[:t], 'g.',label='equil. at %s'%t)
plt.plot(range(t,len(mcmc.DeltaG.trace())),mcmc.DeltaG.trace()[t:], '.')
plt.xlabel('MCMC sample');
plt.ylabel('$\Delta G$ ($k_B T$)');
plt.legend(loc=2);
plt.tight_layout();
return [t,interval,hist,bin_edges,binwidth]
In [15]:
Kd
Out[15]:
In [16]:
print 'Real Kd is 2nm or %s k_B T.' %np.log(Kd)
In [17]:
[t,interval,hist,bin_edges,binwidth] = mcmc_three_plots(pymc_model,mcmc,Ltot)
That works, but the equilibration seems to happen quite late in our sampling! Let's look at some of the other parameters.
In [18]:
well_area = 0.1586 # well area, cm^2 # half-area wells were used here
path_length = assay_volume / well_area
In [19]:
from assaytools import plots
plots.plot_mcmc_results(Ltot, Ptot, path_length, mcmc)
Following instructions as described here: http://twiecki.github.io/blog/2013/09/23/emcee-pymc/
In [12]:
mcmc_emcee = pymcmodels.run_mcmc_emcee(pymc_model)
In [ ]: